library(scran)
library(scater)
library(DropletUtils)
library(openxlsx)
library(Rtsne)
library(umap)
library(RColorBrewer)
source("~/Dropbox/Postdoc/git/BE2020/Analysis/Functions/auxiliary.R")
sce.list <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/All_sce_filtered_high_quality.rds")
sce.list <- lapply(sce.list, function(n){
rowData(n) <- rowData(n)[,1:2]
n
})
# Order sce objects for batch correction
sce.list <- sce.list[order(unlist(lapply(sce.list, ncol)), decreasing = TRUE)]
n <- names(sce.list)
sce.list <- sce.list[c(which(grepl("NSCJ", n)), which(grepl("BSCJ", n)),
which(grepl("NE", n)), which(grepl("NG", n)),
which(grepl("BE", n)), which(grepl("ND", n)),
which(grepl("SMG", n)))]
# Combine sce objects
sce <- do.call("cbind", sce.list)
# Batch correction
corrected <- batch.correction(sce.list)
# Save batch corrected counts in metdata
metadata(sce)$corrected <- corrected
# Compute tsne on corrected counts
set.seed(1234)
tsne <- Rtsne(t(corrected), pca = FALSE)
umap <- umap(t(corrected))
# Store tsne in slot
reducedDims(sce)$TSNE <- tsne$Y[,1:2]
reducedDims(sce)$umap <- umap$layout[,1:2]
# Clustering on corrected data
g <- buildSNNGraph(corrected, k = 10)
clusters <- igraph::cluster_louvain(g)$membership
# Save clustering in new slot
colData(sce)$Global_cluster <- clusters
# Visualize clustering
ggplot(data.frame(tSNE1 = tsne$Y[,1],
tSNE2 = tsne$Y[,2],
clusters = as.factor(clusters))) +
geom_point(aes(tSNE1, tSNE2, colour = clusters), size = 0.5)
# Tissue
ggplot(data.frame(tSNE1 = tsne$Y[,1],
tSNE2 = tsne$Y[,2],
tissue = as.factor(colData(sce)$Tissue))) +
geom_point(aes(tSNE1, tSNE2, colour = tissue), size = 0.5) +
scale_color_brewer(palette = "Set1")
ggplot(data.frame(UMAP1 = umap$layout[,1],
UMAP2 = umap$layout[,2],
tissue = as.factor(colData(sce)$Tissue))) +
geom_point(aes(UMAP1, UMAP2, colour = tissue), size = 0.5) +
scale_color_brewer(palette = "Set1")
# Patient
ggplot(data.frame(tSNE1 = tsne$Y[,1],
tSNE2 = tsne$Y[,2],
patient = as.factor(colData(sce)$Patient))) +
geom_point(aes(tSNE1, tSNE2, colour = patient), size = 0.5) +
scale_color_manual(values = c(brewer.pal(8, "Set1"), brewer.pal(8, "Set3")))
ggplot(data.frame(UMAP1 = umap$layout[,1],
UMAP2 = umap$layout[,2],
patient = as.factor(colData(sce)$Patient))) +
geom_point(aes(UMAP1, UMAP2, colour = patient), size = 0.5) +
scale_color_manual(values = c(brewer.pal(8, "Set1"), brewer.pal(8, "Set3")))
# Patient and tissue
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
ggplot(data.frame(tSNE1 = tsne$Y[,1],
tSNE2 = tsne$Y[,2],
all = as.factor(paste(colData(sce)$Tissue, colData(sce)$Patient)))) +
geom_point(aes(tSNE1, tSNE2, colour = all), size = 0.5) +
scale_color_manual(values = col_vector)
ggplot(data.frame(UMAP1 = umap$layout[,1],
UMAP2 = umap$layout[,2],
all = as.factor(paste(colData(sce)$Tissue, colData(sce)$Patient)))) +
geom_point(aes(UMAP1, UMAP2, colour = all), size = 0.5) +
scale_color_manual(values = col_vector)
# Plot all patients individually
# Generate patient colour vector
patient_vector <- c(brewer.pal(8, "Set1"), brewer.pal(8, "Set3"))[1:length(unique(sce$Patient))]
names(patient_vector) <- unique(sce$Patient)
for(i in c("NE", "NSCJ", "BSCJ", "BE", "NG", "ND", "SMG")){
cur_tsne <- tsne$Y[sce$Tissue == i,]
cur_patient <- sce$Patient[sce$Tissue == i]
print(ggplot(data.frame(tSNE1 = cur_tsne[,1],
tSNE2 = cur_tsne[,2],
patient = cur_patient)) +
geom_point(aes(tSNE1, tSNE2, colour = patient), size = 0.5) +
scale_color_manual(values = patient_vector) + ggtitle(i))
}
table(colData(sce)$Patient, colData(sce)$Tissue)
##
## BE BSCJ ND NE NG NSCJ SMG
## Patient01 0 0 0 2961 189 153 0
## Patient02 0 0 0 5611 1280 2014 0
## Patient03 737 1091 0 749 504 0 0
## Patient04 0 0 0 0 0 2650 0
## Patient05 0 0 0 1928 0 1344 0
## Patient06 0 0 0 733 94 1250 0
## Patient07 1448 764 570 998 599 0 0
## Patient08 0 0 806 921 135 798 0
## Patient09 361 529 545 659 217 0 0
## Patient10 0 0 0 2103 139 867 821
## Patient11 0 0 0 0 0 0 1911
## Patient12 0 0 1653 0 0 0 0
## Patient13 0 0 0 0 898 2626 383
## Patient14 494 0 0 0 0 0 0
colSums(table(colData(sce)$Patient, colData(sce)$Tissue))
## BE BSCJ ND NE NG NSCJ SMG
## 3040 2384 3574 16663 4055 11702 3115
# Perform differential expression
markers <- findMarkers(sce, groups = colData(sce)$Global_cluster,
block = paste(colData(sce)$Patient, colData(sce)$Tissue, sep = "_"))
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 1
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 2
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 3
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 5
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 6
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 7
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 8
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 9
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 10
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 11
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 15
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 16
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 25 and 19
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 26 and 15
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 26 and 19
markers.spec <- lapply(markers, function(n){
if(!is.na(n$Top[1]) & !is.nan(sum(as.matrix(n[1,4:ncol(n)])))){
test_n <- !is.na(n[1,4:ncol(n)])[1,]
cur_n <- n[n$FDR < 0.1 & apply(n[,4:ncol(n)], 1, function(x){sum(x[test_n] > 0)}) == sum(test_n),]
if(nrow(cur_n) > 0){
cur_n$GeneName <- rowData(sce)$Symbol[match(rownames(cur_n), rowData(sce)$ID)]
}
}
else{
cur_n <- NULL
}
cur_n
})
write.xlsx(markers.spec, paste("~/Dropbox/Postdoc/2019-12-29_BE2020/Results/Marker_genes/Global_filtered/Marker_genes.xlsx", sep = ""))
saveRDS(sce, "~/Dropbox/Postdoc/2019-12-29_BE2020/All_corrected_sce_filtered.rds")
all_sce <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/All_corrected_sce_filtered.rds")
tissue_sce <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/Tissue_sce_filtered.rds")
tissue.clusters <- vector(length = ncol(all_sce))
names(tissue.clusters) <- paste(colData(all_sce)$Barcode,
colData(all_sce)$Patient,
colData(all_sce)$Tissue, sep = "_")
for(i in 1:length(tissue_sce)){
tissue.clusters[paste(colData(tissue_sce[[i]])$Barcode,
colData(tissue_sce[[i]])$Patient,
colData(tissue_sce[[i]])$Tissue, sep = "_")] <- colData(tissue_sce[[i]])$Tissue_cluster
}
colData(all_sce)$Tissue_cluster <- tissue.clusters
saveRDS(all_sce, "~/Dropbox/Postdoc/2019-12-29_BE2020/All_corrected_sce_filtered.rds")
To finish get session info:
sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Fedora 31 (Workstation Edition)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] destiny_3.0.1 edgeR_3.28.0
## [3] limma_3.42.2 dbscan_1.1-5
## [5] princurve_2.1.4 dynamicTreeCut_1.63-1
## [7] RColorBrewer_1.1-2 umap_0.2.4.1
## [9] Rtsne_0.15 openxlsx_4.1.4
## [11] DropletUtils_1.6.1 scater_1.14.6
## [13] ggplot2_3.2.1 scran_1.14.6
## [15] SingleCellExperiment_1.8.0 SummarizedExperiment_1.16.1
## [17] DelayedArray_0.12.2 BiocParallel_1.20.1
## [19] matrixStats_0.55.0 Biobase_2.46.0
## [21] GenomicRanges_1.38.0 GenomeInfoDb_1.22.0
## [23] IRanges_2.20.2 S4Vectors_0.24.3
## [25] BiocGenerics_0.32.0 rmarkdown_2.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 RcppEigen_0.3.3.7.0 igraph_1.2.4.2
## [4] lazyeval_0.2.2 sp_1.3-2 RcppHNSW_0.2.0
## [7] digest_0.6.24 htmltools_0.4.0 viridis_0.5.1
## [10] magrittr_1.5 R.utils_2.9.2 xts_0.12-0
## [13] askpass_1.1 colorspace_1.4-1 rappdirs_0.3.1
## [16] haven_2.2.0 xfun_0.12 dplyr_0.8.4
## [19] crayon_1.3.4 RCurl_1.98-1.1 jsonlite_1.6.1
## [22] hexbin_1.28.1 zoo_1.8-7 glue_1.3.1
## [25] gtable_0.3.0 zlibbioc_1.32.0 XVector_0.26.0
## [28] car_3.0-6 BiocSingular_1.2.1 Rhdf5lib_1.8.0
## [31] DEoptimR_1.0-8 HDF5Array_1.14.2 abind_1.4-5
## [34] VIM_5.1.0 scales_1.1.0 ggplot.multistats_1.0.0
## [37] ggthemes_4.2.0 Rcpp_1.0.3 viridisLite_0.3.0
## [40] laeken_0.5.1 reticulate_1.14 dqrng_0.2.1
## [43] foreign_0.8-72 rsvd_1.0.2 proxy_0.4-23
## [46] vcd_1.4-5 farver_2.0.3 pkgconfig_2.0.3
## [49] R.methodsS3_1.8.0 nnet_7.3-12 locfit_1.5-9.1
## [52] labeling_0.3 tidyselect_1.0.0 rlang_0.4.4
## [55] munsell_0.5.0 cellranger_1.1.0 tools_3.6.2
## [58] ranger_0.12.1 batchelor_1.2.4 evaluate_0.14
## [61] stringr_1.4.0 yaml_2.2.1 knitr_1.28
## [64] zip_2.0.4 robustbase_0.93-5 purrr_0.3.3
## [67] formatR_1.7 R.oo_1.23.0 compiler_3.6.2
## [70] beeswarm_0.2.3 curl_4.3 e1071_1.7-3
## [73] smoother_1.1 tibble_2.1.3 statmod_1.4.33
## [76] stringi_1.4.5 RSpectra_0.16-0 forcats_0.4.0
## [79] lattice_0.20-38 Matrix_1.2-18 vctrs_0.2.2
## [82] pillar_1.4.3 lifecycle_0.1.0 lmtest_0.9-37
## [85] BiocNeighbors_1.4.1 data.table_1.12.8 bitops_1.0-6
## [88] irlba_2.3.3 R6_2.4.1 pcaMethods_1.78.0
## [91] gridExtra_2.3 rio_0.5.16 vipor_0.4.5
## [94] codetools_0.2-16 boot_1.3-23 MASS_7.3-51.4
## [97] assertthat_0.2.1 rhdf5_2.30.1 openssl_1.4.1
## [100] withr_2.1.2 GenomeInfoDbData_1.2.2 hms_0.5.3
## [103] grid_3.6.2 tidyr_1.0.2 class_7.3-15
## [106] DelayedMatrixStats_1.8.0 carData_3.0-3 TTR_0.23-6
## [109] scatterplot3d_0.3-41 ggbeeswarm_0.6.0